#################################################################################
############## Lab 10: Nonlinear forecasting ############################
############## TF + Feature Engineering #############################
############## ----------- solution --------- ############################
#################################################################################
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ ggplot2 3.5.1 ✔ fma 2.5
## ✔ forecast 8.23.0 ✔ expsmooth 2.3
##
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(tseries) #contains adf.test function
library(TSA)
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
library(readxl)
library(MLTools)
## __ __ _ _________
## | \ / | | | |___ ___| _____ _____ _ _____
## | \/ | | | ____ | | | _ | | _ | | | | ___|
## | |\ /| | | | |____| | | | | | | | | | | | | |___ |
## | | \/ | | | |____ | | | |_| | | |_| | | |__ ___| |
## |_| |_| |______| |_| |_____| |_____| |____| |_____|
##
## Learning is fun, Machine Learning is funnier
## -----------------------------------------
## With great power comes great responsibility
## -----------------------------------------
## Created by: Jos<e9> Portela Gonz<e1>lez <Jose.Portela@iit.comillas.edu>
## Guillermo Mestre Marcos <Guillermo.Mestre@comillas.edu> Jaime Pizarroso Gonzalo
## <jpizarroso@comillas.edu> Antonio Mu<f1>oz San Roque
## <antonio.munoz@iit.comillas.edu>
##
## Escuela T<e9>cnica Superior de Ingenier<ed>a ICAI
library(imputeTS)
##
## Attaching package: 'imputeTS'
##
## The following object is masked from 'package:tseries':
##
## na.remove
##
## The following object is masked from 'package:zoo':
##
## na.locf
## Set working directory ---------------------------------------------------------------------------------------------
try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path)))
# Lectura datos -------------------------------------------------------------------------------------------------
fdata <- read_excel("DAILY_DEMAND_TR_NA.xlsx")
#Convert to time series object
fdata_ts <- ts(fdata, frequency=7)
autoplot(fdata_ts, facets = TRUE)

# Imputation of missing values
ggplot_na_distribution(fdata_ts[,4])

imp_seas <- na_seasplit(fdata_ts)
ggplot_na_imputations(fdata_ts[,4], imp_seas[,4])

imp_kalman <- na_kalman(fdata_ts, model = "auto.arima")
ggplot_na_imputations(fdata_ts[,4], imp_kalman[,4])

fdata_ts <- imp_seas
#Create time series
y <- fdata_ts[,2]
x <- fdata_ts[,c(3,4)]
autoplot(cbind(y,x), facets = TRUE)

#Scale
y.tr <- y/100
x.tr <- x/1
autoplot(cbind(y.tr,x.tr), facets = TRUE)

## Identification and fitting process -------------------------------------------------------------------------------------------------------
#(The different steps followed are represented for teaching purposes)
## FIRST --------------------------------------------
#### Fit initial FT model with large s
#This arima function belongs to the TSA package
TF.fit <- arima(y.tr,
xtransf = x.tr,
order=c(1,0,0),
seasonal = list(order=c(1,0,0),period=7),
transfer = list(c(0,9),c(0,9)),
method="ML")
#Check regression error to see the need of differentiation
TF.RegressionError.plot(y.tr,x.tr,TF.fit,lag.max = 50)
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Regular differentiation is needed and coefficients of explanatory variables cannot be interpreted
## SECOND --------------------------------------------
#### Fit initial FT model with large s
TF.fit <- arima(y.tr,
xtransf = x.tr,
order=c(1,1,0),
seasonal = list(order=c(1,0,0),period=7),
transfer = list(c(0,9),c(0,9)),
method="ML")
## Warning in log(s2): NaNs produced
summary(TF.fit) #summary of training errors and estimated coefficients
##
## Call:
## arima(x = y.tr, order = c(1, 1, 0), seasonal = list(order = c(1, 0, 0), period = 7),
## method = "ML", xtransf = x.tr, transfer = list(c(0, 9), c(0, 9)))
##
## Coefficients:
## ar1 sar1 WD-MA0 WD-MA1 WD-MA2 WD-MA3 WD-MA4 WD-MA5
## 0.0202 0.1081 7.7084 0.1133 0.0610 -0.0875 -0.0755 -0.0376
## s.e. 0.0227 0.0227 0.0857 0.0931 0.1052 0.1078 0.1098 0.1099
## WD-MA6 WD-MA7 WD-MA8 WD-MA9 TEMP-MA0 TEMP-MA1 TEMP-MA2 TEMP-MA3
## -0.0393 0.0273 -0.1601 -0.0428 -0.0080 -0.0091 -0.0028 -0.0051
## s.e. 0.1080 0.1052 0.0930 0.0853 0.0021 0.0021 0.0021 0.0021
## TEMP-MA4 TEMP-MA5 TEMP-MA6 TEMP-MA7 TEMP-MA8 TEMP-MA9
## -0.0027 -0.0023 -0.0052 0.0005 -0.0019 0.0048
## s.e. 0.0021 0.0021 0.0021 0.0021 0.0021 0.0021
##
## sigma^2 estimated as 0.01934: log likelihood = 1090.82, aic = -2137.65
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0001495119 0.1390487 0.1017627 -0.01655897 1.462634 0.2114647
## ACF1
## Training set 0.002126683
coeftest(TF.fit) #statistical significance of estimated coefficients
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.02021036 0.02272285 0.8894 0.3737725
## sar1 0.10810555 0.02267399 4.7678 1.862e-06 ***
## WD-MA0 7.70838271 0.08571117 89.9344 < 2.2e-16 ***
## WD-MA1 0.11326613 0.09308661 1.2168 0.2236871
## WD-MA2 0.06102610 0.10518577 0.5802 0.5617970
## WD-MA3 -0.08754697 0.10784644 -0.8118 0.4169212
## WD-MA4 -0.07551842 0.10983013 -0.6876 0.4917092
## WD-MA5 -0.03758325 0.10986361 -0.3421 0.7322831
## WD-MA6 -0.03929341 0.10795906 -0.3640 0.7158835
## WD-MA7 0.02728403 0.10520079 0.2594 0.7953637
## WD-MA8 -0.16007692 0.09302759 -1.7207 0.0852968 .
## WD-MA9 -0.04281963 0.08531323 -0.5019 0.6157303
## TEMP-MA0 -0.00800931 0.00212406 -3.7707 0.0001628 ***
## TEMP-MA1 -0.00913615 0.00212678 -4.2958 1.741e-05 ***
## TEMP-MA2 -0.00277048 0.00213453 -1.2979 0.1943095
## TEMP-MA3 -0.00508456 0.00213635 -2.3800 0.0173115 *
## TEMP-MA4 -0.00274090 0.00214421 -1.2783 0.2011492
## TEMP-MA5 -0.00230439 0.00214644 -1.0736 0.2830088
## TEMP-MA6 -0.00524246 0.00213781 -2.4523 0.0141964 *
## TEMP-MA7 0.00046739 0.00213598 0.2188 0.8267922
## TEMP-MA8 -0.00188518 0.00212468 -0.8873 0.3749309
## TEMP-MA9 0.00480513 0.00212146 2.2650 0.0235116 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check regression error to see the need of differentiation
TF.RegressionError.plot(y.tr,x.tr,TF.fit,lag.max = 50)
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Regression error is stationary, then, we can continue
#NOTE: If this regression error was not stationary in variance,boxcox should be applied to input and output series.
# Try AR(1)7 for seasonal component and ARMA(2,1) for regular component?
#Check numerator coefficients of explanatory variable
TF.Identification.plot(x.tr,TF.fit)
## [1] "WD"
## Estimate Std. Error z value Pr(>|z|)
## WD-MA0 7.70838271 0.08571117 89.9344037 0.00000000
## WD-MA1 0.11326613 0.09308661 1.2167822 0.22368710
## WD-MA2 0.06102610 0.10518577 0.5801745 0.56179695
## WD-MA3 -0.08754697 0.10784644 -0.8117743 0.41692118
## WD-MA4 -0.07551842 0.10983013 -0.6875929 0.49170919
## WD-MA5 -0.03758325 0.10986361 -0.3420901 0.73228310
## WD-MA6 -0.03929341 0.10795906 -0.3639658 0.71588352
## WD-MA7 0.02728403 0.10520079 0.2593519 0.79536371
## WD-MA8 -0.16007692 0.09302759 -1.7207467 0.08529681
## WD-MA9 -0.04281963 0.08531323 -0.5019108 0.61573030
## [1] "TEMP"
## Estimate Std. Error z value Pr(>|z|)
## TEMP-MA0 -0.0080093112 0.002124064 -3.7707484 0.0001627587
## TEMP-MA1 -0.0091361462 0.002126778 -4.2957688 0.0000174089
## TEMP-MA2 -0.0027704842 0.002134532 -1.2979354 0.1943095499
## TEMP-MA3 -0.0050845641 0.002136349 -2.3800248 0.0173114730
## TEMP-MA4 -0.0027409037 0.002144205 -1.2782842 0.2011492341
## TEMP-MA5 -0.0023043904 0.002146445 -1.0735848 0.2830088091
## TEMP-MA6 -0.0052424635 0.002137813 -2.4522552 0.0141963920
## TEMP-MA7 0.0004673889 0.002135975 0.2188176 0.8267921694
## TEMP-MA8 -0.0018851765 0.002124682 -0.8872748 0.3749309447
## TEMP-MA9 0.0048051329 0.002121456 2.2650170 0.0235116353

#WD: b=0, r=0, s=0
#TEMP: b=0, r=0, s=1
## THIRD --------------------------------------------
#### Fit model with selected values and estimate initial ARIMA structure
TF.fit <- arima(y.tr,
xtransf = x.tr,
order=c(2,1,1),
seasonal = list(order=c(1,0,0),period=7),
transfer = list(c(0,0),c(0,1)),
method="ML")
#Check regression error to identify correlation structure
summary(TF.fit) #summary of training errors and estimated coefficients
##
## Call:
## arima(x = y.tr, order = c(2, 1, 1), seasonal = list(order = c(1, 0, 0), period = 7),
## method = "ML", xtransf = x.tr, transfer = list(c(0, 0), c(0, 1)))
##
## Coefficients:
## ar1 ar2 ma1 sar1 WD-MA0 TEMP-MA0 TEMP-MA1
## 0.9054 -0.0978 -0.9093 0.1313 7.7388 -0.0077 -0.0087
## s.e. 0.0304 0.0235 0.0211 0.0235 0.0444 0.0021 0.0021
##
## sigma^2 estimated as 0.01908: log likelihood = 1108.9, aic = -2203.8
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0003025821 0.1380877 0.1009769 -0.03250181 1.446983 0.2098319
## ACF1
## Training set 0.002079806
coeftest(TF.fit) #statistical significance of estimated coefficients
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.9053787 0.0304439 29.7393 < 2.2e-16 ***
## ar2 -0.0977585 0.0235450 -4.1520 3.296e-05 ***
## ma1 -0.9093086 0.0211119 -43.0709 < 2.2e-16 ***
## sar1 0.1312836 0.0234984 5.5869 2.311e-08 ***
## WD-MA0 7.7388244 0.0443777 174.3855 < 2.2e-16 ***
## TEMP-MA0 -0.0077486 0.0020891 -3.7091 0.000208 ***
## TEMP-MA1 -0.0087230 0.0020819 -4.1900 2.790e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check residuals
CheckResiduals.ICAI(TF.fit, lag=50)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,1)(1,0,0)[7]
## Q* = 65.171, df = 43, p-value = 0.01615
##
## Model df: 7. Total lags used: 50
PlotModelDiagnosis(x.tr, y.tr, fitted(TF.fit), together = TRUE)
## [1] "Residuals vs WD"
## [1] "Residuals vs TEMP"
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range (`stat_smooth()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range (`stat_smooth()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

###############################################################################
# Generate new temperature variables
###############################################################################
#Plot relation between temperature and demand
ggplot(fdata)+geom_point(aes(x=TEMP, y=DEM))
## Warning: Removed 23 rows containing missing values or values outside the scale range
## (`geom_point()`).

#We can see that there is a non-linear relation between them.
#Create new time series splitting the temperatures
fdata$tcold <- sapply(fdata$TEMP,min,17)
fdata$thot <- sapply(fdata$TEMP,max,22)
fdata_ts <- ts(fdata, frequency=7)
imp_seas <- na_seasplit(fdata_ts)
fdata_ts <- imp_seas
#Create time series
y <- fdata_ts[,2]
x <- fdata_ts[,c(3,5,6)]
autoplot(cbind(y,x), facets = TRUE)

#Scale
y.tr <- y/100
x.tr <- x/1
autoplot(cbind(y.tr,x.tr), facets = TRUE)

## Identification and fitting process -------------------------------------------------------------------------------------------------------
#(The different steps followed are represented for teaching purposes)
## FIRST --------------------------------------------
#### Fit initial FT model with large s
#This arima function belongs to the TSA package
TF.fit <- arima(y.tr,
xtransf = x.tr,
order=c(1,0,0),
seasonal = list(order=c(1,0,0),period=7),
transfer = list(c(0,9),c(0,9),c(0,9)),
method="ML")
## Warning in arima(y.tr, xtransf = x.tr, order = c(1, 0, 0), seasonal =
## list(order = c(1, : possible convergence problem: optim gave code=1
#Check regression error to see the need of differentiation
TF.RegressionError.plot(y.tr,x.tr,TF.fit,lag.max = 50)
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Regular differentiation is needed and coefficients of explanatory variables cannot be interpreted
## SECOND --------------------------------------------
#### Fit initial FT model with large s
TF.fit <- arima(y.tr,
xtransf = x.tr,
order=c(1,1,0),
seasonal = list(order=c(1,0,0),period=7),
transfer = list(c(0,9),c(0,9),c(0,9)),
method="ML")
summary(TF.fit) #summary of training errors and estimated coefficients
##
## Call:
## arima(x = y.tr, order = c(1, 1, 0), seasonal = list(order = c(1, 0, 0), period = 7),
## method = "ML", xtransf = x.tr, transfer = list(c(0, 9), c(0, 9), c(0, 9)))
##
## Coefficients:
## ar1 sar1 WD-MA0 WD-MA1 WD-MA2 WD-MA3 WD-MA4 WD-MA5
## -0.1456 0.1889 7.6510 0.0654 0.0365 -0.1013 -0.0859 -0.0694
## s.e. 0.0225 0.0225 0.0726 0.0748 0.0840 0.0837 0.0856 0.0856
## WD-MA6 WD-MA7 WD-MA8 WD-MA9 tcold-MA0 tcold-MA1 tcold-MA2
## -0.0785 0.0604 -0.1382 -0.0301 -0.0359 -0.0262 -0.0115
## s.e. 0.0838 0.0840 0.0748 0.0723 0.0022 0.0022 0.0023
## tcold-MA3 tcold-MA4 tcold-MA5 tcold-MA6 tcold-MA7 tcold-MA8
## -0.0107 -0.0050 -0.0064 -0.0089 0.0023 -0.0023
## s.e. 0.0022 0.0022 0.0022 0.0022 0.0023 0.0022
## tcold-MA9 thot-MA0 thot-MA1 thot-MA2 thot-MA3 thot-MA4 thot-MA5
## 0.0010 0.0737 0.0280 0.0120 0.0067 0.0066 0.0105
## s.e. 0.0023 0.0048 0.0048 0.0049 0.0048 0.0048 0.0048
## thot-MA6 thot-MA7 thot-MA8 thot-MA9
## -0.0028 -0.0034 -0.0004 0.0054
## s.e. 0.0048 0.0048 0.0048 0.0047
##
## sigma^2 estimated as 0.01459: log likelihood = 1368.46, aic = -2672.93
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0002706268 0.1207643 0.08918052 -0.01419992 1.294395 0.1853187
## ACF1
## Training set -0.02673016
coeftest(TF.fit) #statistical significance of estimated coefficients
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 -0.1456251 0.0225377 -6.4614 1.037e-10 ***
## sar1 0.1888777 0.0225230 8.3860 < 2.2e-16 ***
## WD-MA0 7.6509799 0.0726264 105.3471 < 2.2e-16 ***
## WD-MA1 0.0653920 0.0748163 0.8740 0.382100
## WD-MA2 0.0365011 0.0839715 0.4347 0.663791
## WD-MA3 -0.1012625 0.0836678 -1.2103 0.226167
## WD-MA4 -0.0858998 0.0856087 -1.0034 0.315668
## WD-MA5 -0.0694331 0.0856499 -0.8107 0.417560
## WD-MA6 -0.0785434 0.0837879 -0.9374 0.348549
## WD-MA7 0.0604056 0.0839908 0.7192 0.472022
## WD-MA8 -0.1381522 0.0748328 -1.8461 0.064871 .
## WD-MA9 -0.0301445 0.0723086 -0.4169 0.676761
## tcold-MA0 -0.0359476 0.0022481 -15.9905 < 2.2e-16 ***
## tcold-MA1 -0.0262455 0.0022365 -11.7349 < 2.2e-16 ***
## tcold-MA2 -0.0115337 0.0022566 -5.1110 3.205e-07 ***
## tcold-MA3 -0.0106870 0.0022327 -4.7866 1.696e-06 ***
## tcold-MA4 -0.0049865 0.0022376 -2.2285 0.025847 *
## tcold-MA5 -0.0063843 0.0022391 -2.8513 0.004354 **
## tcold-MA6 -0.0088805 0.0022370 -3.9698 7.194e-05 ***
## tcold-MA7 0.0023018 0.0022646 1.0164 0.309420
## tcold-MA8 -0.0023003 0.0022419 -1.0261 0.304867
## tcold-MA9 0.0010335 0.0022550 0.4583 0.646729
## thot-MA0 0.0737167 0.0048458 15.2126 < 2.2e-16 ***
## thot-MA1 0.0279656 0.0048468 5.7698 7.934e-09 ***
## thot-MA2 0.0120275 0.0048601 2.4747 0.013334 *
## thot-MA3 0.0067006 0.0047768 1.4027 0.160696
## thot-MA4 0.0065706 0.0048185 1.3636 0.172686
## thot-MA5 0.0105192 0.0048113 2.1864 0.028790 *
## thot-MA6 -0.0027618 0.0047602 -0.5802 0.561792
## thot-MA7 -0.0034115 0.0048115 -0.7090 0.478304
## thot-MA8 -0.0003691 0.0047674 -0.0774 0.938289
## thot-MA9 0.0053618 0.0047445 1.1301 0.258425
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check regression error to see the need of differentiation
TF.RegressionError.plot(y.tr,x.tr,TF.fit,lag.max = 50)
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Regression error is stationary, then, we can continue
#NOTE: If this regression error was not stationary in variance,boxcox should be applied to input and output series.
# Try AR(1)7 for seasonal component and AR(2) for regular component?
#Check numerator coefficients of explanatory variable
TF.Identification.plot(x.tr,TF.fit)
## [1] "WD"
## Estimate Std. Error z value Pr(>|z|)
## WD-MA0 7.65097995 0.07262636 105.3471462 0.00000000
## WD-MA1 0.06539198 0.07481626 0.8740344 0.38209955
## WD-MA2 0.03650111 0.08397145 0.4346847 0.66379130
## WD-MA3 -0.10126253 0.08366783 -1.2102922 0.22616677
## WD-MA4 -0.08589976 0.08560871 -1.0033998 0.31566801
## WD-MA5 -0.06943315 0.08564990 -0.8106623 0.41755960
## WD-MA6 -0.07854338 0.08378790 -0.9374073 0.34854910
## WD-MA7 0.06040560 0.08399081 0.7191930 0.47202200
## WD-MA8 -0.13815224 0.07483278 -1.8461461 0.06487099
## WD-MA9 -0.03014448 0.07230859 -0.4168866 0.67676135
## [1] "tcold"
## Estimate Std. Error z value Pr(>|z|)
## tcold-MA0 -0.035947568 0.002248058 -15.9905018 1.488274e-57
## tcold-MA1 -0.026245487 0.002236526 -11.7349368 8.438930e-32
## tcold-MA2 -0.011533690 0.002256645 -5.1109894 3.204759e-07
## tcold-MA3 -0.010687035 0.002232705 -4.7865854 1.696428e-06
## tcold-MA4 -0.004986461 0.002237583 -2.2285031 2.584698e-02
## tcold-MA5 -0.006384309 0.002239058 -2.8513377 4.353570e-03
## tcold-MA6 -0.008880489 0.002237033 -3.9697628 7.194420e-05
## tcold-MA7 0.002301777 0.002264546 1.0164408 3.094195e-01
## tcold-MA8 -0.002300317 0.002241911 -1.0260519 3.048671e-01
## tcold-MA9 0.001033506 0.002255029 0.4583114 6.467287e-01
## [1] "thot"
## Estimate Std. Error z value Pr(>|z|)
## thot-MA0 0.0737167194 0.004845770 15.21259006 2.917674e-52
## thot-MA1 0.0279656091 0.004846852 5.76984956 7.934233e-09
## thot-MA2 0.0120274631 0.004860126 2.47472243 1.333398e-02
## thot-MA3 0.0067005788 0.004776800 1.40273372 1.606963e-01
## thot-MA4 0.0065705912 0.004818478 1.36362370 1.726860e-01
## thot-MA5 0.0105192065 0.004811301 2.18635385 2.878973e-02
## thot-MA6 -0.0027618167 0.004760255 -0.58018253 5.617915e-01
## thot-MA7 -0.0034115020 0.004811487 -0.70903283 4.783041e-01
## thot-MA8 -0.0003690972 0.004767444 -0.07742036 9.382891e-01
## thot-MA9 0.0053618422 0.004744481 1.13012197 2.584248e-01

#WD: b=0, r=0, s=0
#tcold: b=0, r=1, s=0
#thot: b=0, r=1, s=0
## THIRD --------------------------------------------
#### Fit model with selected values and estimate initial ARIMA structure
TF.fit <- arima(y.tr,
xtransf = x.tr,
order=c(2,1,0),
seasonal = list(order=c(1,0,0),period=7),
transfer = list(c(0,0),c(1,0),c(1,0)),
method="ML")
## Warning in log(s2): NaNs produced
## Warning in log(s2): NaNs produced
## Warning in log(s2): NaNs produced
## Warning in log(s2): NaNs produced
## Warning in arima(y.tr, xtransf = x.tr, order = c(2, 1, 0), seasonal =
## list(order = c(1, : possible convergence problem: optim gave code=1
#Check regression error to identify correlation structure
summary(TF.fit) #summary of training errors and estimated coefficients
##
## Call:
## arima(x = y.tr, order = c(2, 1, 0), seasonal = list(order = c(1, 0, 0), period = 7),
## method = "ML", xtransf = x.tr, transfer = list(c(0, 0), c(1, 0), c(1, 0)))
##
## Coefficients:
## ar1 ar2 sar1 WD-MA0 tcold-AR1 tcold-MA0 thot-AR1
## -0.1639 -0.1776 0.1586 7.8178 0.4405 -0.0374 0.3693
## s.e. 0.0237 0.0235 0.0229 0.0398 0.0675 0.0019 0.0416
## thot-MA0
## 0.0718
## s.e. 0.0044
##
## sigma^2 estimated as 0.01485: log likelihood = 1357.61, aic = -2699.21
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.000344201 0.1218135 0.08987042 -0.02001728 1.302992 0.1867523
## ACF1
## Training set -0.009679965
coeftest(TF.fit) #statistical significance of estimated coefficients
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 -0.1639247 0.0237467 -6.9031 5.089e-12 ***
## ar2 -0.1775620 0.0235003 -7.5557 4.165e-14 ***
## sar1 0.1585748 0.0229236 6.9175 4.596e-12 ***
## WD-MA0 7.8178409 0.0397517 196.6667 < 2.2e-16 ***
## tcold-AR1 0.4405121 0.0675286 6.5233 6.876e-11 ***
## tcold-MA0 -0.0373812 0.0019255 -19.4141 < 2.2e-16 ***
## thot-AR1 0.3693012 0.0416196 8.8732 < 2.2e-16 ***
## thot-MA0 0.0717739 0.0043749 16.4060 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check residuals
CheckResiduals.ICAI(TF.fit, lag=50)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,0)(1,0,0)[7]
## Q* = 112.6, df = 42, p-value = 2.283e-08
##
## Model df: 8. Total lags used: 50
## FOURTH --------------------------------------------
#### lets try MA/(2) instead of AR(2)
#### start with (0,1,2)(1,0,0)7
TF.fit <- arima(y.tr,
xtransf = x.tr,
order=c(0,1,2),
seasonal = list(order=c(1,0,0),period=7),
transfer = list(c(0,0),c(1,0),c(1,0)),
method="ML")
## Warning in arima(y.tr, xtransf = x.tr, order = c(0, 1, 2), seasonal =
## list(order = c(1, : possible convergence problem: optim gave code=1
summary(TF.fit) #summary of training errors and estimated coefficients
##
## Call:
## arima(x = y.tr, order = c(0, 1, 2), seasonal = list(order = c(1, 0, 0), period = 7),
## method = "ML", xtransf = x.tr, transfer = list(c(0, 0), c(1, 0), c(1, 0)))
##
## Coefficients:
## ma1 ma2 sar1 WD-MA0 tcold-AR1 tcold-MA0 thot-AR1
## -0.2348 -0.2400 0.1586 7.7629 0.6552 -0.0382 0.4523
## s.e. 0.0223 0.0239 0.0225 0.0411 0.0208 0.0017 0.0339
## thot-MA0
## 0.0698
## s.e. 0.0041
##
## sigma^2 estimated as 0.0141: log likelihood = 1408.26, aic = -2800.51
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0004318267 0.1187317 0.087793 -0.02405313 1.272522 0.1824354
## ACF1
## Training set 0.01845859
coeftest(TF.fit) #statistical significance of estimated coefficients
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.2347890 0.0223162 -10.5210 < 2.2e-16 ***
## ma2 -0.2399553 0.0239227 -10.0305 < 2.2e-16 ***
## sar1 0.1585683 0.0224745 7.0555 1.72e-12 ***
## WD-MA0 7.7628533 0.0410811 188.9641 < 2.2e-16 ***
## tcold-AR1 0.6551798 0.0207928 31.5099 < 2.2e-16 ***
## tcold-MA0 -0.0382484 0.0016875 -22.6651 < 2.2e-16 ***
## thot-AR1 0.4523263 0.0339400 13.3272 < 2.2e-16 ***
## thot-MA0 0.0697618 0.0041500 16.8101 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check residuals
CheckResiduals.ICAI(TF.fit, lag=50)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,2)(1,0,0)[7]
## Q* = 92.444, df = 42, p-value = 1.18e-05
##
## Model df: 8. Total lags used: 50
#If residuals are not white noise, change order of ARMA
#Remaning correlations in lags 4 and 5 and 14. Increase orders
## FIFTH --------------------------------------------
#### Try (0,1,5)(2,0,0)7
TF.fit <- arima(y.tr,
xtransf = x.tr,
order=c(0,1,5),
seasonal = list(order=c(2,0,0),period=7),
transfer = list(c(0,0),c(1,0),c(1,0)),
method="ML")
## Warning in arima(y.tr, xtransf = x.tr, order = c(0, 1, 5), seasonal =
## list(order = c(2, : possible convergence problem: optim gave code=1
summary(TF.fit) #summary of training errors and estimated coefficients
##
## Call:
## arima(x = y.tr, order = c(0, 1, 5), seasonal = list(order = c(2, 0, 0), period = 7),
## method = "ML", xtransf = x.tr, transfer = list(c(0, 0), c(1, 0), c(1, 0)))
##
## Coefficients:
## ma1 ma2 ma3 ma4 ma5 sar1 sar2 WD-MA0
## -0.2247 -0.1993 -0.0568 -0.0816 -0.0576 0.1755 0.0458 7.7432
## s.e. 0.0228 0.0234 0.0228 0.0243 0.0230 0.0232 0.0228 0.0442
## tcold-AR1 tcold-MA0 thot-AR1 thot-MA0
## 0.6573 -0.0390 0.4649 0.0687
## s.e. 0.0208 0.0018 0.0347 0.0043
##
## sigma^2 estimated as 0.01385: log likelihood = 1426.54, aic = -2829.09
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0006101027 0.117636 0.08683689 -0.02893619 1.255731 0.1804486
## ACF1
## Training set -0.0001093044
coeftest(TF.fit) #statistical significance of estimated coefficients
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.2246741 0.0228241 -9.8437 < 2.2e-16 ***
## ma2 -0.1992616 0.0233867 -8.5203 < 2.2e-16 ***
## ma3 -0.0567997 0.0227573 -2.4959 0.0125644 *
## ma4 -0.0815808 0.0243499 -3.3504 0.0008071 ***
## ma5 -0.0576490 0.0229939 -2.5071 0.0121710 *
## sar1 0.1755026 0.0232275 7.5558 4.162e-14 ***
## sar2 0.0458038 0.0227501 2.0133 0.0440787 *
## WD-MA0 7.7432120 0.0441636 175.3301 < 2.2e-16 ***
## tcold-AR1 0.6572825 0.0208206 31.5689 < 2.2e-16 ***
## tcold-MA0 -0.0389775 0.0017708 -22.0117 < 2.2e-16 ***
## thot-AR1 0.4649279 0.0347058 13.3963 < 2.2e-16 ***
## thot-MA0 0.0687455 0.0042881 16.0318 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check residuals
CheckResiduals.ICAI(TF.fit, lag=50)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,5)(2,0,0)[7]
## Q* = 57.782, df = 38, p-value = 0.0208
##
## Model df: 12. Total lags used: 50
#If residuals are not white noise, change order of ARMA
#Correlations are modeled, But many MA terms not significant. Try AR alternative
## SIXTH --------------------------------------------
#### Try (5,1,0)(2,0,0)7
TF.fit <- arima(y.tr,
xtransf = x.tr,
order=c(5,1,0),
seasonal = list(order=c(2,0,0),period=7),
transfer = list(c(0,0),c(1,0),c(1,0)),
method="ML")
summary(TF.fit) #summary of training errors and estimated coefficients
##
## Call:
## arima(x = y.tr, order = c(5, 1, 0), seasonal = list(order = c(2, 0, 0), period = 7),
## method = "ML", xtransf = x.tr, transfer = list(c(0, 0), c(1, 0), c(1, 0)))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 sar1 sar2 WD-MA0
## -0.2169 -0.2415 -0.1363 -0.1487 -0.1268 0.1207 0.0591 7.7456
## s.e. 0.0225 0.0230 0.0235 0.0230 0.0231 0.0236 0.0226 0.0436
## tcold-AR1 tcold-MA0 thot-AR1 thot-MA0
## 0.6584 -0.0386 0.4554 0.0697
## s.e. 0.0203 0.0017 0.0347 0.0042
##
## sigma^2 estimated as 0.0139: log likelihood = 1422.59, aic = -2821.17
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0004034362 0.1178726 0.08688364 -0.02362039 1.258602 0.1805457
## ACF1
## Training set -0.003580911
coeftest(TF.fit) #statistical significance of estimated coefficients
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 -0.2168945 0.0225211 -9.6307 < 2.2e-16 ***
## ar2 -0.2414818 0.0230057 -10.4966 < 2.2e-16 ***
## ar3 -0.1363313 0.0235230 -5.7957 6.805e-09 ***
## ar4 -0.1486906 0.0229862 -6.4687 9.885e-11 ***
## ar5 -0.1267777 0.0231496 -5.4765 4.339e-08 ***
## sar1 0.1207081 0.0235919 5.1165 3.113e-07 ***
## sar2 0.0590879 0.0226338 2.6106 0.009038 **
## WD-MA0 7.7455967 0.0436296 177.5308 < 2.2e-16 ***
## tcold-AR1 0.6584161 0.0203285 32.3888 < 2.2e-16 ***
## tcold-MA0 -0.0385911 0.0017080 -22.5948 < 2.2e-16 ***
## thot-AR1 0.4554008 0.0346716 13.1347 < 2.2e-16 ***
## thot-MA0 0.0696936 0.0042162 16.5299 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check residuals
CheckResiduals.ICAI(TF.fit, lag=50)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(5,1,0)(2,0,0)[7]
## Q* = 55.53, df = 38, p-value = 0.03296
##
## Model df: 12. Total lags used: 50
#All coefficients significant --> Good.
#All correlations are small --> Good.
#Check Cross correlation residuals - expl. variables
res <- residuals(TF.fit)
res[is.na(res)] <- 0
ccf(y = res, x = x.tr[,1])

ccf(y = res, x = x.tr[,2])

ccf(y = res, x = x.tr[,3])

########
#X1 seems correlated with residuals -> modify TF
## SEVENTH --------------------------------------------
#### Try (5,1,0)(2,0,0)7 and modify TF for X1
TF.fit <- arima(y.tr,
xtransf = x.tr,
order=c(5,1,0),
seasonal = list(order=c(2,0,0),period=7),
transfer = list(c(0,1),c(1,0),c(1,0)),
method="ML")
summary(TF.fit) #summary of training errors and estimated coefficients
##
## Call:
## arima(x = y.tr, order = c(5, 1, 0), seasonal = list(order = c(2, 0, 0), period = 7),
## method = "ML", xtransf = x.tr, transfer = list(c(0, 1), c(1, 0), c(1, 0)))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 sar1 sar2 WD-MA0
## -0.2183 -0.244 -0.1364 -0.1478 -0.1266 0.1241 0.0617 7.7495
## s.e. 0.0225 0.023 0.0235 0.0230 0.0232 0.0237 0.0227 0.0438
## WD-MA1 tcold-AR1 tcold-MA0 thot-AR1 thot-MA0
## 0.0663 0.6578 -0.0384 0.4723 0.0701
## s.e. 0.0437 0.0205 0.0017 0.0343 0.0041
##
## sigma^2 estimated as 0.01388: log likelihood = 1423.61, aic = -2821.22
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0004049858 0.1177685 0.08693924 -0.02056747 1.259037 0.1806613
## ACF1
## Training set -0.003618925
coeftest(TF.fit) #statistical significance of estimated coefficients
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 -0.2183334 0.0224868 -9.7094 < 2.2e-16 ***
## ar2 -0.2440247 0.0230357 -10.5933 < 2.2e-16 ***
## ar3 -0.1363839 0.0235227 -5.7980 6.713e-09 ***
## ar4 -0.1477789 0.0229692 -6.4338 1.245e-10 ***
## ar5 -0.1265691 0.0231653 -5.4637 4.662e-08 ***
## sar1 0.1240664 0.0236593 5.2439 1.572e-07 ***
## sar2 0.0617365 0.0226829 2.7217 0.006494 **
## WD-MA0 7.7495250 0.0437929 176.9583 < 2.2e-16 ***
## WD-MA1 0.0662699 0.0437449 1.5149 0.129794
## tcold-AR1 0.6578258 0.0205477 32.0145 < 2.2e-16 ***
## tcold-MA0 -0.0383677 0.0017128 -22.4003 < 2.2e-16 ***
## thot-AR1 0.4722826 0.0343140 13.7636 < 2.2e-16 ***
## thot-MA0 0.0701416 0.0041342 16.9661 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check residuals
CheckResiduals.ICAI(TF.fit, lag=50)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).

##
## Ljung-Box test
##
## data: Residuals from ARIMA(5,1,0)(2,0,0)[7]
## Q* = 53.891, df = 37, p-value = 0.03589
##
## Model df: 13. Total lags used: 50
#All coefficients significant ???
#All correlations are small --> Good.
#Check Cross correlation residuals - expl. variables
res <- residuals(TF.fit)
res[is.na(res)] <- 0
ccf(y = res, x = x.tr[,1]) # --> much better

ccf(y = res, x = x.tr[,2])

ccf(y = res, x = x.tr[,3])

## Finished model identification
#Check fitted
autoplot(y.tr, series = "Real")+
forecast::autolayer(fitted(TF.fit), series = "Fitted")
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

#Chech training errors of the model
accuracy(fitted(TF.fit),y.tr)
## ME RMSE MAE MPE MAPE ACF1
## Test set -0.0004049858 0.1177685 0.08693924 -0.02056747 1.259037 -0.003618925
## Theil's U
## Test set 0.1694156
PlotModelDiagnosis(x.tr, y.tr, fitted(TF.fit), together = TRUE)
## [1] "Residuals vs WD"
## [1] "Residuals vs tcold"
## [1] "Residuals vs thot"
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range (`stat_smooth()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range (`stat_smooth()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range (`stat_smooth()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

#################################################################################
## Forecast for new data with h = 7
#################################################################################
x.tv <- read_excel("DAILY_DEMAND_TV.xlsx")
#divide temperatures for validation
x.tv$tcold <- sapply(x.tv$TEMP,min,17)
x.tv$thot <- sapply(x.tv$TEMP,max,22)
x.tv <- ts(x.tv[,c(2,4,5)])
#Obtain forecast for horizon = 7 using the trained parameters of the model
val.forecast_h7 <- TF.forecast(y.old = y.tr, #past values of the series
x.old = x.tr, #Past values of the explanatory variables
x.new = x.tv, #New values of the explanatory variables
model = TF.fit, #fitted transfer function model
h=7) #Forecast horizon
val.forecast_h7 * 100
## [1] 692.7268 643.1141 770.4921 782.3234 778.0349 656.0698 709.4874